What is STAG2? What should we expect if we knocked out STAG2 from a cell?
STAG2 is a important subunit in the cohesin complex that plays an important role in regulating sister chromatid alignment during cell division, and other genetic regulatory functions. Studies show that STAG2-mut EWS have higher rates of metastatic disease and worse outcomes. It is expected that the TC71 & A673 cell lines will have differing phenotype effects to STAG2 KO, for example STAG2 deletion will lead to TC71 growth defect but with A673 it will lead to a growth advantage. STAG1 levels could also increase, in order to possibly fill in for the removal of STAG2 in the cell cycle, a decrease of RAD21 is also observed. The cohesin complex also mediates intrachromosomal interactions including those conjoining enhancers to promoters. Loss of STAG2 produces highly consistent and stable transcriptional changes that may undergo selection to confer a competitive advantage. Two neurodvelopmental transcription factors, POU3F2 and NR2F1 were consistently upregulated in STAG2 KO studies. (Adane et al, 2022)
All samples are downloaded from the SRA Run Selector BioProject PRJNA549593, slight quality control via fastp was conducted and then the reads were aligned using Salmon for Transcript-level quantification files. The metadata for the samples was provided from the SRA Run Selector also. 3 sample comparisons was conducted using 1. SA2 KO vs WT in A673 cells 2. SA2 KO vs WT in TC71 cells 3. SA2 KO in TC71 cells VS A673 cells.
samples <- read.table(file.path("Analysis/SraRunTable.txt"), sep = ",", header = TRUE) %>%
dplyr::mutate(cell_line = ifelse(grepl("A673", x = source_name), "A673", "TC71")) %>%
dplyr::mutate(condition = ifelse(grepl("WT|siCT", x = GENOTYPE), "Control", "Treatment"))
A673_samples <- samples %>%
dplyr::filter(GENOTYPE %in% c("WT", "SA2 KO") & cell_line == "A673")
TC71_samples <- samples %>%
dplyr::filter(GENOTYPE %in% c("SA2 KO", "WT") & cell_line == "TC71")
SA2KO_samples <- samples %>%
dplyr::filter(GENOTYPE == "SA2 KO")
A673_salmon_files <- file.path("Salmon/salmon.out", A673_samples$Run, "quant.sf") %>%
setNames(object = , A673_samples$Run)
TC71_salmon_files <- file.path("Salmon/salmon.out", TC71_samples$Run, "quant.sf") %>%
setNames(object = , TC71_samples$Run)
SA2KO_salmon_files <- file.path("Salmon/salmon.out", SA2KO_samples$Run, "quant.sf") %>%
setNames(object = , SA2KO_samples$Run)
ensdb <- EnsDb.Hsapiens.v86
transcripts <- transcripts(ensdb, columns = c(listColumns(ensdb, "tx"), "gene_name"),
return.type = "data.frame") %>%
as_tibble() %>%
dplyr::select(tx_id, gene_name)
A673_txi <- tximport(A673_salmon_files, type = "salmon", tx2gene = transcripts, ignoreTxVersion = TRUE)
TC71_txi <- tximport(TC71_salmon_files, type = "salmon", tx2gene = transcripts, ignoreTxVersion = TRUE)
SA2KO_txi <- tximport(SA2KO_salmon_files, type = "salmon", tx2gene = transcripts, ignoreTxVersion = TRUE)
A673_dds_txi <- DESeqDataSetFromTximport(A673_txi, colData = A673_samples, design = ~condition)
TC71_dds_txi <- DESeqDataSetFromTximport(TC71_txi, colData = TC71_samples, design = ~condition)
SA2KO_dds_txi <- DESeqDataSetFromTximport(SA2KO_txi, colData = SA2KO_samples, design = ~cell_line)
vst <- vst(A673_dds_txi)
A673_PCA <- plotPCA(vst, intgroup = c("cell_line", "GENOTYPE"), returnData = TRUE)
A673_percentVar <- round(100 * attr(A673_PCA, "percentVar"))
ggplot(A673_PCA, aes(PC1, PC2, color = GENOTYPE)) +
geom_point(size = 3) +
ggtitle("PCA Plot for A673 STAG2KO samples" ) +
xlab(paste0("PC1: ", A673_percentVar[1], "% variance")) +
ylab(paste0("PC2: ", A673_percentVar[2], "% variance")) +
coord_fixed()
TC71_vst <- vst(TC71_dds_txi)
TC71_PCA <- plotPCA(TC71_vst, intgroup = c("cell_line", "GENOTYPE"), returnData = TRUE)
TC71_percentVar <- round(100 * attr(TC71_PCA, "percentVar"))
ggplot(TC71_PCA, aes(PC1, PC2, color = GENOTYPE)) +
geom_point(size = 3) +
ggtitle("PCA Plot for TC71 STAG2KO samples" ) +
xlab(paste0("PC1: ", A673_percentVar[1], "% variance")) +
ylab(paste0("PC2: ", A673_percentVar[2], "% variance")) +
coord_fixed()
SA2KO_vst <- vst(SA2KO_dds_txi)
SA2KO_PCA <- plotPCA(SA2KO_vst, intgroup = c("cell_line", "GENOTYPE"), returnData = TRUE)
SA2KO_percentVar <- round(100 * attr(SA2KO_PCA, "percentVar"))
ggplot(SA2KO_PCA, aes(PC1, PC2, color = cell_line)) +
geom_point(size = 3) +
ggtitle("PCA Plot for All STAG2KO samples" ) +
xlab(paste0("PC1: ", SA2KO_percentVar[1], "% variance")) +
ylab(paste0("PC2: ", SA2KO_percentVar[2], "% variance")) +
coord_fixed()
A673_metadata <- colData(A673_dds_txi)
TC71_metadata <- colData(TC71_dds_txi)
all_metadata <- rbind(A673_metadata, TC71_metadata)
all_metadata %>%
as.data.frame() %>%
dplyr::select(GENOTYPE, cell_line) %>%
kbl(caption = "Table 1: Sample Overview") %>%
kable_styling(bootstrap_options = "striped", full_width = T, html_font = "Cambria")
| GENOTYPE | cell_line | |
|---|---|---|
| SRR9326185 | SA2 KO | A673 |
| SRR9326186 | SA2 KO | A673 |
| SRR9326187 | SA2 KO | A673 |
| SRR9326195 | WT | A673 |
| SRR9326196 | WT | A673 |
| SRR9326197 | WT | A673 |
| SRR9326198 | SA2 KO | TC71 |
| SRR9326199 | SA2 KO | TC71 |
| SRR9326200 | SA2 KO | TC71 |
| SRR9326201 | SA2 KO | TC71 |
| SRR9326202 | SA2 KO | TC71 |
| SRR9326203 | SA2 KO | TC71 |
| SRR9326208 | WT | TC71 |
| SRR9326209 | WT | TC71 |
| SRR9326210 | WT | TC71 |
EnhancedVolcano(A673_sig_ordered_result,
lab = A673_sig_ordered_result$Gene_name,
x = "log2FoldChange",
y = "pvalue",
title = "Siginifcant Genes for STAG2KO in A673 cells",
subtitle = "",
pointSize = 1.0,
labSize = 4.0,
xlim = c(min(A673_sig_ordered_result$log2FoldChange), max(A673_sig_ordered_result$log2FoldChange)),
ylim = c(0, 300)
)
EnhancedVolcano(TC71_sig_ordered_result,
lab = TC71_sig_ordered_result$Gene_name,
x = "log2FoldChange",
y = "pvalue",
title = "Siginifcant Genes for STAG2KO in TC71 Cells versus Control",
subtitle = "",
pointSize = 1.0,
labSize = 4.0,
xlim = c(min(TC71_sig_ordered_result$log2FoldChange), max(TC71_sig_ordered_result$log2FoldChange)),
ylim = c(0, 200)
)
EnhancedVolcano(SA2KO_sig_ordered_result,
lab = SA2KO_sig_ordered_result$Gene_name,
x = "log2FoldChange",
y = "pvalue",
title = "Siginifcant Genes for between the STAG2KO samples in A673 and TC71 cell lines",
subtitle = "",
pointSize = 1.0,
labSize = 4.0,
xlim = c(min(SA2KO_sig_ordered_result$log2FoldChange), max(SA2KO_sig_ordered_result$log2FoldChange)),
ylim = c(0, 200)
)
A673_table_result <- dplyr::select(A673_sig_ordered_result, Gene_name, log2FoldChange, stat, pvalue, padj)
datatable(A673_table_result, class = 'cell-border stripe',
caption = "Table 2: A672 STAG2KO Differentally Significant Genes")
TC71_table_result <- dplyr::select(TC71_sig_ordered_result, Gene_name, log2FoldChange, stat, pvalue, padj)
datatable(TC71_table_result, class = 'cell-border stripe',
caption = "Table 3: TC71 STAG2KO Differentally Significant Genes")
SA2KO_table_result <- dplyr::select(SA2KO_sig_ordered_result, Gene_name, log2FoldChange, stat, pvalue, padj)
datatable(SA2KO_table_result, class = 'cell-border stripe',
caption = "Table 4: STAG2KO samples Differentally Significant Genes")
A673_heatmap <- pheatmap(A673_sig_norm_dds_counts,
main = "Top 10 Over- and Under- expressed A673 STAG2KO DEGs",
color = palette(200),
cluster_rows = FALSE,
cluster_cols = FALSE,
show_rownames = TRUE,
annotation = dplyr::select(A673_heat_meta, condition),
scale = "row")
TC71_heatmap <- pheatmap(TC71_sig_norm_dds_counts,
main = "Top 10 Over- and Under- expressed TC71 STAG2KO DEGs",
color = palette(200),
cluster_rows = FALSE,
cluster_cols = FALSE,
show_rownames = TRUE,
annotation = dplyr::select(TC71_heat_meta, condition),
scale = "row")
SA2KO_heatmap <- pheatmap(SA2KO_sig_norm_dds_counts,
main = "Top 10 Over- and Under- expressed STAG2KO sample DEGs",
color = palette(200),
cluster_rows = FALSE,
cluster_cols = FALSE,
show_rownames = TRUE,
annotation = dplyr::select(SA2KO_heat_meta, cell_line),
scale = "row")
A673_ordered_gse <- A673_gse %>%
dplyr::arrange(desc(abs(NES)))
A673_ordered_gse_df <- A673_ordered_gse %>%
as_tibble() %>%
dplyr::select(ONTOLOGY, ID, Description, NES, pvalue, p.adjust, qvalue)
datatable(A673_ordered_gse_df, class = 'cell-border stripe',
caption = "Table 5: GSEA enrichment results for A673 STAG2KO significant DEGs")
gseaplot2(A673_ordered_gse, geneSetID = 1, title = A673_ordered_gse$Description[1])
TC71_ordered_gse <- TC71_gse %>%
dplyr::arrange(desc(abs(NES)))
TC71_ordered_gse_df <- TC71_ordered_gse %>%
as_tibble() %>%
dplyr::select(ONTOLOGY, ID, Description, NES, pvalue, p.adjust, qvalue)
datatable(TC71_ordered_gse_df, class = 'cell-border stripe',
caption = "Table 6: GSEA enrichment results for TC71 STAG2KO significant DEGs")
gseaplot2(TC71_ordered_gse, geneSetID = 1, title = TC71_ordered_gse$Description[1])
SA2KO_ordered_gse <- SA2KO_gse %>%
dplyr::arrange(desc(abs(NES)))
SA2KO_ordered_gse_df <- SA2KO_ordered_gse %>%
as_tibble() %>%
dplyr::select(ONTOLOGY, ID, Description, NES, pvalue, p.adjust, qvalue)
datatable(SA2KO_ordered_gse_df, class = 'cell-border stripe',
caption = "Table 7: GSEA enrichment results for all STAG2KO samples significant DEGs")
gseaplot2(SA2KO_ordered_gse, geneSetID = 1, title = SA2KO_ordered_gse$Description[1])
A673_resRmd <- llply(names(A673_gene_list), function(groupNow) {
genesNow <- A673_gene_list[[groupNow]]
response <- httr::POST(
url = 'https://maayanlab.cloud/Enrichr/addList',
body = list(
'list' = paste0(genesNow, collapse = "\n"),
'description' = groupNow
)
)
response <- jsonlite::fromJSON(httr::content(response, as = "text"))
permalink <- paste0("https://maayanlab.cloud/Enrichr/enrich?dataset=",
response$shortId[1])
knitr::knit_child(text = c(
'### `r groupNow`',
'',
'Enrichr Link: <a href="`r permalink`" target="_blank">`r groupNow`</a>.',
''
),
envir = environment(),
quiet = TRUE)
})
cat(unlist(A673_resRmd), sep = '\n')
Enrichr Link: Over-expressed.
Enrichr Link: Under-expressed.
TC71_resRmd <- llply(names(TC71_gene_list), function(groupNow) {
genesNow <- TC71_gene_list[[groupNow]]
response <- httr::POST(
url = 'https://maayanlab.cloud/Enrichr/addList',
body = list(
'list' = paste0(genesNow, collapse = "\n"),
'description' = groupNow
)
)
response <- jsonlite::fromJSON(httr::content(response, as = "text"))
permalink <- paste0("https://maayanlab.cloud/Enrichr/enrich?dataset=",
response$shortId[1])
knitr::knit_child(text = c(
'### `r groupNow`',
'',
'Enrichr Link: <a href="`r permalink`" target="_blank">`r groupNow`</a>.',
''
),
envir = environment(),
quiet = TRUE)
})
cat(unlist(TC71_resRmd), sep = '\n')
Enrichr Link: Over-expressed.
Enrichr Link: Under-expressed.
SA2KO_resRmd <- llply(names(SA2KO_gene_list), function(groupNow) {
genesNow <- SA2KO_gene_list[[groupNow]]
response <- httr::POST(
url = 'https://maayanlab.cloud/Enrichr/addList',
body = list(
'list' = paste0(genesNow, collapse = "\n"),
'description' = groupNow
)
)
response <- jsonlite::fromJSON(httr::content(response, as = "text"))
permalink <- paste0("https://maayanlab.cloud/Enrichr/enrich?dataset=",
response$shortId[1])
knitr::knit_child(text = c(
'### `r groupNow`',
'',
'Enrichr Link: <a href="`r permalink`" target="_blank">`r groupNow`</a>.',
''
),
envir = environment(),
quiet = TRUE)
})
cat(unlist(SA2KO_resRmd), sep = '\n')
Enrichr Link: Over-expressed.
Enrichr Link: Under-expressed.
Adane B, Alexe G, Seong BKA, Lu D, Hwang EE, Hnisz D, Lareau CA, Ross L, Lin S, Dela Cruz FS, Richardson M, Weintraub AS, Wang S, Iniguez AB, Dharia NV, Conway AS, Robichaud AL, Tanenbaum B, Krill-Burger JM, Vazquez F, Schenone M, Berman JN, Kung AL, Carr SA, Aryee MJ, Young RA, Crompton BD, Stegmaier K. STAG2 loss rewires oncogenic and developmental programs to promote metastasis in Ewing sarcoma. Cancer Cell. 2021 Jun 14;39(6):827-844.e10. doi: 10.1016/j.ccell.2021.05.007. PMID: 34129824; PMCID: PMC8378827.